# Copyright: 2008 MoinMoin:BastianBlank
# Copyright: 2010-2011 MoinMoin:ThomasWaldmann
# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.

"""
MoinMoin - Include handling

Expands the Include macro and transclusion elements in an internal Moin document. Note
these two forms produce similar results:

    {{jpeg.jpg}}
    <<Include(jpeg.jpg)>>

However, the Include macro has many optional parameters to tailor the output.
See the docs for an explanation of include arguments.

The Include macro is the only macro processed here. All other macros are left as is
and will be processed later. See /items/content.py, _expand_document method.

Adjusting the DOM
=================

After expanding the include elements, in many cases it is necessary to adjust
the DOM to prevent the generation of invalid HTML.  Using a simple example,
"\n{{SomeItem}}\n", the starting DOM structure created by the moinwiki_in.py
(or other parser) is::

    Page > Body > P > Include

After expansion of the Include, the structure will be::

    Page > Body > P > Page > Body > (P | Div | Object |...)

moinwiki_in.py (or other parser) does not adjust the DOM structure based upon
whether the contents of the transcluded item are inline or block.  Sometime after
include processing is complete, html_out.py will convert the transcluded
Body > Page into a Div or Span wrapping the transclusion contents.

This works well for things like "\n||mytable||{{BlockOrInline}}||\n" where
almost any type of element is valid within a table cell's td.

But without DOM adjustment, "\n{{Block}}\n" will generate invalid HTML
because html_out.py will convert the DOM structure::

    Page > Body > P > Page > Body > (Pre | Div | P, P... | ...)

into::

    ...<body><p><div>...</div></p></body>...

where the </p> is invalid.

In some cases it is desirable to coerce a transcluded small image or phrase into a
inline element embedded within a paragraph. Here html_out.py will wrap the transclusion in
a Span rather than a Div or convert a P-tag containing a phrase into a Span::

    "My pet {{bird.jpg}} flys.", "[[SomePage|{{Logo.png}}]]" or "Yes, we have {{no}} bananas."

In complex cases where a block level item is transcluded within the midst of
several levels of text markup, such as::

   "plain ''italic '''bold {{BlockItem}} bold''' italic'' plain"

then we must avoid generating invalid html like::

    <p>plain <emphasis>italic <strong>bold <div>
    ...</div> bold</strong> italic</emphasis> plain</p>

where <div>...</div> contains the transcluded item, but rather::

    <p>plain <emphasis>italic <strong>bold</strong></emphasis></p><div>
    ...</div><p><emphasis><strong> bold</strong> italic</emphasis> plain</p>

In these complex cases, we must build a DOM structure that will replace
the containing element's parent, grand-parent, great-grand-parent...

When a block element is embedded within a comment, it is important that the
class="comment" is copied to the transclusion to provide the show/hide and
highlighted styles normally applied to comments::

    /* normal ''italic ~-small {{detail.csv}} small-~ italic'' normal */

Conveniently, the class="comment" is added to the span element within the
moinwiki_in.py parser and is available to include.py.  However, the moin-big
and moin-small classes are applied to span elements by html_out.py so those
classes are not available.  Italic, bold, stroke, and underline styling
effects are implemented through specialized tags rather than CSS classes.
In the example above, only class="comment" will be applied to detail.csv.
"""

from emeraldtree import ElementTree as ET
import re
import copy

from flask import current_app as app
from flask import g as flaskg

from whoosh.query import Term, And, Regex

from moin.constants.keys import NAME_EXACT, WIKINAME
from moin.items import Item
from moin.utils import close_file
from moin.utils.iri import Iri, IriPath
from moin.utils.tree import html, moin_page, xinclude, xlink
from moin.utils.mime import type_moin_document
from moin.converters.html_out import mark_item_as_transclusion, Attributes
from moin.i18n import _

from . import default_registry
from ._args import Arguments

from moin import log
logging = log.getLogger(__name__)


# elements generated by moin wiki markup that cannot have block children
NO_BLOCK_CHILDREN = [
    'p',
    'span',  # /*comment*/, ~+big+~, ~-small-~ via classes comment, moin-big, moin-small
    'emphasis',  # ''italic''
    'strong',  # '''bold'''
    'del',  # --(stroke)--
    'ins',  # __underline__
    # 'sub',  # ,,subscript,, # no markup allowed within subscripts
    # 'sup',  # ^superscript^ # no markup allowed within superscripts
    'a',  # [[SomeItem|{{logo.png}}]]
]


class XPointer(list):
    """
    Simple XPointer parser

    parses strings like 'xmlns(page=http://moinmo.in/namespaces/page)page:include(pages(^^pn))'
    """

    tokenizer_rules = r"""
        # Match escaped syntax elements
        \^[()^]
        |
        (?P<bracket_open> \( )
        |
        (?P<bracket_close> \) )
        |
        (?P<whitespace> \s+ )
        |
        # Anything else
        [^()^]+
    """
    tokenizer_re = re.compile(tokenizer_rules, re.X)

    class Entry:
        __slots__ = 'name', 'data'

        def __init__(self, name, data):
            self.name, self.data = name, data

        @property
        def data_unescape(self):
            data = self.data.replace('^(', '(').replace('^)', ')')
            return data.replace('^^', '^')

    def __init__(self, input):
        name = []
        stack = []

        for match in self.tokenizer_re.finditer(input):
            if match.group('bracket_open'):
                stack.append([])
            elif match.group('bracket_close'):
                top = stack.pop()
                if stack:
                    stack[-1].append('(')
                    stack[-1].extend(top)
                    stack[-1].append(')')
                else:
                    self.append(self.Entry(''.join(name), ''.join(top)))
                    name = []
            else:
                if stack:
                    stack[-1].append(match.group())
                elif not match.group('whitespace'):
                    name.append(match.group())

        while len(stack) > 1:
            top = stack.pop()
            stack[-1].extend(top)

        if name:
            self.append(self.Entry(''.join(name), None))


class Converter:

    @classmethod
    def _factory(cls, input, output, includes=None, **kw):
        if includes == 'expandall':
            return cls()

    def recurse(self, elem, page_href):
        # on first call, elem.tag.name=='page'.
        # Descendants (body, div, p, include, page, etc.) are processed by recursing through DOM

        # stack is used to detect transclusion loops
        page_href_new = elem.get(moin_page.page_href)
        if page_href_new:
            page_href_new = Iri(page_href_new)
            if page_href_new != page_href:
                page_href = page_href_new
                self.stack.append(page_href)
            else:
                self.stack.append(None)
        else:
            self.stack.append(None)

        try:
            if elem.tag == xinclude.include:
                # we have already recursed several levels and found a transclusion: "{{SomePage}}" or <<Include(...)>>
                # process the transclusion and add it to the DOM.  Subsequent recursions will traverse through
                # the transclusion's elements.
                href = elem.get(xinclude.href)
                xpointer = elem.get(xinclude.xpointer)

                xp_include_pages = None
                xp_include_sort = None
                xp_include_items = None
                xp_include_skipitems = None
                xp_include_heading = None
                xp_include_level = None

                if xpointer:
                    # we are working on an <<Include(abc)>> macro, not a {{transclusion}}
                    xp = XPointer(xpointer)
                    xp_include = None
                    xp_namespaces = {}
                    for entry in xp:
                        uri = None
                        name = entry.name.split(':', 1)
                        if len(name) > 1:
                            prefix, name = name
                            uri = xp_namespaces.get(prefix, False)
                        else:
                            name = name[0]

                        if uri is None and name == 'xmlns':
                            d_prefix, d_uri = entry.data.split('=', 1)
                            xp_namespaces[d_prefix] = d_uri
                        elif uri == moin_page.namespace and name == 'include':
                            xp_include = XPointer(entry.data)

                    if xp_include:
                        for entry in xp_include:
                            name, data = entry.name, entry.data_unescape
                            # TODO: These do not include all parameters in moin 1.9 Include macro docs:
                            # <<Include(pagename, heading, level, from="regex", to="regex", sort=ascending|descending,
                            #           items=n, skipitems=n, titlesonly, editlink)>>
                            # these are currently unsupported in moin 2.0: from, to, titlesonly, editlink
                            if name == 'pages':  # pages == pagename in moin 1.9
                                xp_include_pages = data
                            elif name == 'sort':
                                xp_include_sort = data
                            elif name == 'items':
                                xp_include_items = int(data)
                            elif name == 'skipitems':
                                xp_include_skipitems = int(data)
                            elif name == 'heading':
                                xp_include_heading = data
                            elif name == 'level':
                                xp_include_level = data

                included_elements = []
                if href:
                    # We have a single page to transclude or include
                    href = Iri(href)
                    link = Iri(scheme='wiki', authority='')
                    if href.scheme == 'wiki':
                        if href.authority:
                            raise ValueError("can't handle xinclude for non-local authority")
                        else:
                            path = href.path[1:]
                    elif href.scheme == 'wiki.local':
                        page = page_href
                        path = href.path
                        if path[0] == '':
                            # /subitem
                            tmp = page.path[1:]
                            tmp.extend(path[1:])
                            path = tmp
                        elif path[0] == '..':
                            # ../sisteritem
                            path = page.path[1:] + path[1:]
                    else:
                        raise ValueError("can't handle xinclude for schemes other than wiki or wiki.local")

                    link.path = path

                    if flaskg.user.may.read(str(path)):
                        page = Item.create(str(path))
                        pages = ((page, link), )
                    else:
                        # ACLs prevent user from viewing a transclusion - show message
                        message = moin_page.p(children=(_('Access Denied, transcluded content suppressed.')))
                        attrib = {html.class_: 'warning moin-read-denied'}
                        div = ET.Element(moin_page.div, attrib, children=(message, ))
                        container = ET.Element(moin_page.body, children=(div, ))
                        return [container, 0]  # replace transclusion with container's child

                elif xp_include_pages:
                    # we have regex of pages to include:  <<Include(^qqq)>>
                    query = And([Term(WIKINAME, app.cfg.interwikiname), Regex(NAME_EXACT, xp_include_pages)])
                    reverse = xp_include_sort == 'descending'
                    results = flaskg.storage.search(query, sortedby=NAME_EXACT, reverse=reverse, limit=None)
                    pagelist = [result.fqname.fullname for result in results]
                    if xp_include_skipitems is not None:
                        pagelist = pagelist[xp_include_skipitems:]
                    if xp_include_items is not None:
                        pagelist = pagelist[xp_include_items + 1:]
                    pages = ((Item.create(p), Iri(scheme='wiki', authority='', path='/' + p)) for p in pagelist)
                    if not pagelist:
                        msg = _('Error: no items found matching "<<Include({0})>>"').format(xp_include_pages)
                        attrib = {html.class_: 'moin-error'}
                        strong = ET.Element(moin_page.strong, attrib, (msg, ))
                        included_elements.append(strong)

                for page, p_href in pages:
                    if p_href.path[0] != '/':
                        p_href.path = IriPath('/' + '/'.join(p_href.path))
                    if p_href in self.stack:
                        # we have a transclusion loop, create an error message showing list of pages forming loop
                        loop = self.stack[self.stack.index(p_href):]
                        loop = ['{0}'.format(ref.path[1:]) for ref in loop if ref is not None] + [page.name]
                        msg = 'Error: Transclusion loop via: ' + ', '.join(loop)
                        attrib = {html.class_: 'moin-error'}
                        strong = ET.Element(moin_page.strong, attrib, (msg, ))
                        included_elements.append(strong)
                        continue

                    if xp_include_heading is not None:
                        attrib = {xlink.href: p_href}
                        children = (xp_include_heading or page.name, )
                        elem_a = ET.Element(moin_page.a, attrib, children=children)
                        attrib = {moin_page.outline_level: xp_include_level or '1'}
                        elem_h = ET.Element(moin_page.h, attrib, children=(elem_a, ))
                        included_elements.append(elem_h)

                    page_doc = page.content.internal_representation(attributes=Arguments(keyword=elem.attrib))
                    close_file(page.rev.data)

                    self.recurse(page_doc, page_href)

                    page_doc = mark_item_as_transclusion(page_doc, page)
                    included_elements.append(page_doc)

                if len(included_elements) > 1:
                    # use a div as container
                    result = ET.Element(moin_page.div)
                    result.extend(included_elements)
                elif included_elements:
                    result = included_elements[0]
                else:
                    result = None
                #  end of processing for transclusion; the "result" will get inserted into the DOM below
                return result

            # Traverse the DOM by calling self.recurse with each child of the current elem.
            # Starting elem.tag.name=='page'.
            container = []
            i = 0
            while i < len(elem):
                child = elem[i]
                if isinstance(child, ET.Node):

                    ret = self.recurse(child, page_href)

                    if ret:
                        # Either child or a descendant of child is a transclusion.
                        # See top of this script for notes on why these DOM adjustments are required.
                        if isinstance(ret, ET.Node) and elem.tag.name in NO_BLOCK_CHILDREN:
                            body = ret[0]
                            if len(body) == 0:
                                # the transcluded item is empty, insert an empty span into DOM
                                attrib = Attributes(ret).convert()
                                elem[i] = ET.Element(moin_page.span, attrib=attrib)
                            elif (isinstance(body[0], ET.Node) and
                                  (len(body) > 1 or body[0].tag.name not in ('p', 'object', 'a'))):
                                # Complex case: "some text {{BlockItem}} more text" or "\n{{BlockItem}}\n" where
                                # the BlockItem body contains multiple p's, a table, preformatted text, etc.
                                # These block elements cannot be made a child of the current elem, so we create
                                # a container to replace elem.
                                # Create nodes to hold any siblings before and after current child (elem[i])
                                before = copy.deepcopy(elem)
                                after = copy.deepcopy(elem)
                                before[:] = elem[0:i]
                                after[:] = elem[i + 1:]
                                if len(before):
                                    # there are siblings before transclude, save them in container
                                    container.append(before)
                                new_trans_ptr = len(container)
                                # get attributes from page node;
                                # we expect {class: "moin-transclusion"; data-href: "http://some.org/somepage"}
                                attrib = Attributes(ret).convert()
                                # current elem will likely be replaced by container so we need to copy data-lineno attr
                                if html.data_lineno in elem.attrib:
                                    attrib[html.data_lineno] = elem.attrib[html.data_lineno]
                                # make new div node to hold transclusion, copy children, and save in container
                                div = ET.Element(moin_page.div, attrib=attrib, children=body[:])
                                container.append(div)  # new_trans_ptr is index to this
                                if len(after):
                                    container.append(after)
                                if elem.tag.name == 'a':
                                    # invalid input [[MyPage|{{BlockItem}}]],
                                    # best option is to retain A-tag and fail html validation
                                    # TODO: error may not be obvious to user - add error message
                                    elem[i] = div
                                else:
                                    # move up 1 level in recursion where elem becomes the child and
                                    # is usually replaced by container
                                    return [container, new_trans_ptr]
                            else:
                                # default action for inline transclusions or odd things like
                                # circular transclusion error messages
                                classes = child.attrib.get(html.class_, '').split()
                                classes += ret.attrib.get(html.class_, '').split()
                                ret.attrib[html.class_] = ' '.join(classes)
                                elem[i] = ret
                        elif isinstance(ret, list):
                            # a container has been returned.
                            # Note: there are multiple places where a container may be constructed
                            ret_container, trans_ptr = ret
                            # trans_ptr points to the transclusion within ret_container.
                            # Here the transclusion will always contain a block level element
                            if elem.tag.name in NO_BLOCK_CHILDREN:
                                # Complex case, transclusion effects grand-parent, great-grand-parent, e.g.:
                                # "/* comment {{BlockItem}} */" or  "text ''italic {{BlockItem}} italic'' text"
                                # elem is an inline element, build a bigger container to replace elem's parent,
                                before = copy.deepcopy(elem)
                                after = copy.deepcopy(elem)
                                before[:] = elem[0:i] + ret_container[0:trans_ptr]
                                after[:] = ret_container[trans_ptr + 1:] + elem[i + 1:]
                                if len(before):
                                    container.append(before)
                                new_trans_ptr = len(container)
                                # child may have classes like "comment" that must be added to transcluded element
                                classes = child.attrib.get(moin_page.class_, '').split()
                                # must use moin_page.class_ above, but use html.class below per html_out.py code
                                classes += ret_container[trans_ptr].attrib.get(html.class_, '').split()
                                ret_container[trans_ptr].attrib[html.class_] = ' '.join(classes)
                                container.append(ret_container[trans_ptr])  # the transclusion
                                if len(after):
                                    container.append(after)
                                return [container, new_trans_ptr]
                            else:
                                # elem is a block element
                                for grandchild in child:
                                    if isinstance(grandchild, ET.Node) and grandchild.tag.name == 'include':
                                        # the include may have classes that must be added to transcluded element
                                        classes = grandchild.attrib.get(html.class_, '').split()
                                        classes += ret_container[trans_ptr].attrib.get(html.class_, '').split()
                                        ret_container[trans_ptr].attrib[html.class_] = ' '.join(classes)
                                # replace child element with the container generated in lower recursion
                                elem[i:i + 1] = ret_container  # elem[i] is the child
                        else:
                            # default action for any ret not fitting special cases above,
                            # e.g. tranclusion is within a table cell
                            elem[i] = ret
                # we are finished with this child, advance to next sibling
                i += 1

        finally:
            self.stack.pop()

    def __call__(self, tree):
        self.stack = []
        self.recurse(tree, None)
        return tree


default_registry.register(Converter._factory, type_moin_document, type_moin_document)
